library(data.table)
library(dtplyr)
library(tidyverse)
library(ffanalytics)
library(lpSolve)
library(rPref)
library(kableExtra)
library(plotly)
library(tictoc)

Set Up

We’ll set the week, as well as all of FanDuel’s scoring rules.

week <- 2

scoring <- list(
  pass = list(
    pass_att = 0, pass_comp = 0, pass_inc = 0, pass_yds = 0.04, pass_tds = 4,
    pass_int = -1, pass_40_yds = 0,  pass_300_yds = 0, pass_350_yds = 0,
    pass_400_yds = 0
  ),
  rush = list(
    all_pos = TRUE,
    rush_yds = 0.1,  rush_att = 0, rush_40_yds = 0, rush_tds = 6,
    rush_100_yds = 0, rush_150_yds = 0, rush_200_yds = 0),
  rec = list(
    all_pos = TRUE,
    rec = 0.5, rec_yds = 0.1, rec_tds = 6, rec_40_yds = 0, rec_100_yds = 0,
    rec_150_yds = 0, rec_200_yds = 0
  ),
  misc = list(
    all_pos = TRUE,
    fumbles_lost = -2, fumbles_total = 0,
    sacks = 0, two_pts = 2
  ),
  ret = list(
    all_pos = TRUE,
    return_tds = 6, return_yds = 0
  ),
  dst = list(
    dst_fum_rec = 2,  dst_int = 2, dst_safety = 2, dst_sacks = 1, dst_td = 6,
    dst_blk = 2, dst_ret_yds = 0, dst_pts_allowed = 0
  ),
  pts_bracket = list(
    list(threshold = 0, points = 10),
    list(threshold = 1, points = 7),
    list(threshold = 7, points = 4),
    list(threshold = 14, points = 1),
    list(threshold = 21, points = 0),
    list(threshold = 28, points = -1),
    list(threshold = 35, points = -4)
  )
)

Data Scrape & Clean

sources <- c('CBS', 'ESPN', 'Yahoo', 'FantasySharks', 'FantasyPros', 'FantasyData', 'FleaFlicker')

scrape <- scrape_data(src = sources,
                      pos=c('QB', 'RB', 'WR', 'TE', 'DST'),
                      season = 2020, 
                      week = week)
projections <- projections_table(scrape, scoring_rules = scoring) %>%
  add_player_info()

knitr::kable(head(projections))
id first_name last_name team position age exp pos avg_type points pos_rank drop_off sd_pts floor ceiling tier
0501 Buffalo Bills BUF DST NA 50 DST average 6.353333 14 0.0666667 0.0923760 6.30 6.444000 12
0501 Buffalo Bills BUF DST NA 50 DST robust 6.340000 13 0.0575000 0.0000000 6.30 6.444000 12
0501 Buffalo Bills BUF DST NA 50 DST weighted 6.369298 14 0.0740662 0.1131371 6.30 6.441529 12
0502 Indianapolis Colts IND DST NA 50 DST average 7.366667 4 0.3650000 0.5291503 6.78 7.680000 5
0502 Indianapolis Colts IND DST NA 50 DST robust 7.450000 4 0.6150000 0.2965200 6.78 7.680000 5
0502 Indianapolis Colts IND DST NA 50 DST weighted 7.266887 5 0.0739735 0.5656854 6.70 7.429439 5

The goal is to figure out a data scrape, so all I have to do is run it.

fan_duel <- read_csv("~/Fall_2020/MATH-390/Data/FanDuel-NFL-2020-09-20-49877-players-list.csv") %>% 
  filter(is.na(`Injury Indicator`) | `Injury Indicator` == "Q") %>% 
  mutate(`Last Name` = str_remove_all(`Last Name`, "((?i)III(?-i))"),
         `Last Name` = str_remove_all(`Last Name`, "((?i)II(?-i))"),
         `Last Name` = str_remove_all(`Last Name`, "((?i)IV(?-i))"),
         `Last Name` = str_remove_all(`Last Name`, "((?i)V(?-i))"),
         `Last Name` = str_remove_all(`Last Name`, "((?i)Jr.(?-i))"),
         `Last Name` = str_remove_all(`Last Name`, "((?i)Sr.(?-i))"),
         Name = str_c(`First Name`, `Last Name`, sep = " ")) %>% 
  select(-c(Id, Tier, X15, X16)) %>% 
  mutate(position = case_when(
    Position == "D" ~ "DST",
    TRUE ~ as.character(Position)
  ))

knitr::kable(head(fan_duel))
Position First Name Nickname Last Name FPPG Played Salary Game Team Opponent Injury Indicator Injury Details Name position
RB Christian Christian McCaffrey McCaffrey 27.00 1 10500 CAR TB NA NA Christian McCaffrey RB
QB Lamar Lamar Jackson Jackson 27.50 1 9500 BAL HOU NA NA Lamar Jackson QB
QB Patrick Patrick Mahomes Mahomes 20.44 1 9000 KC LAC NA NA Patrick Mahomes QB
RB Saquon Saquon Barkley Barkley 9.60 1 9000 NYG CHI NA NA Saquon Barkley RB
RB Dalvin Dalvin Cook Cook 21.30 1 8800 MIN IND NA NA Dalvin Cook RB
WR Davante Davante Adams Adams 34.60 1 8600 GB DET NA NA Davante Adams WR

Lineup Generator Function

Our wonderufl linear prgramming-driven function. We set constraints here.

generate_lineup <- function(n){
  pred_sal <- projections %>% 
    filter(avg_type == "robust") %>% 
    mutate(Name = str_c(first_name, last_name, sep = " ")) %>% 
    inner_join(fan_duel, by = c("Name", "position")) %>% 
    select(position, Name, team, points, Salary, sd_pts) %>% 
    drop_na(points, Salary) %>% 
    group_by(Name) %>% 
    mutate(sal_max = max(Salary)) %>%
    filter(Salary == sal_max) %>%
    group_by(Name) %>% 
    mutate(pts_pred = rnorm(1, points, sd_pts), lineup = n) %>% 
    select(-sal_max)
  
  
    obj <- pred_sal$pts_pred
    
    mat <- rbind(t(model.matrix(~ position + 0,pred_sal)), 
               t(model.matrix(~ position + 0,pred_sal)), 
               rep(1, nrow(pred_sal)), pred_sal$Salary)
    
    dir <- c("=","=","<=","<=","<=", "=","=",">=",">=",">=","=","<=")
    
    rhs <- c(1, 1, 3, 2, 4, 1, 1, 2, 1, 3, 9, 60000)
  
  result <- lp("max", obj, mat, dir, rhs, all.bin = TRUE)   
  
  results <- pred_sal[which(result$solution == 1),]
  
  return(results)
}

Simulation Time!

We’ll iterate our lineup generator function 1,000 times (usually we’ll do this 10,000, but this is an example).

tic()
sim_lu <- map_df(1:1000, generate_lineup) %>%
  rename(pts_base = points) %>%  
  mutate(position = factor(position, 
                           levels = c("QB", "RB", "WR", "TE", "DST"))) %>% 
  select(lineup, Name, team, position, pts_base, pts_pred, sd_pts, Salary)
toc()
## 98.434 sec elapsed

Results/Exploration

Looking at the first three lineups from our simulation.

sim_lu %>%
  filter(lineup<=3) %>%
  arrange(lineup, position, desc(pts_pred)) %>%
  knitr::kable() %>%
  kable_styling() %>%
  column_spec(1, bold=TRUE) %>%
  collapse_rows(columns = 1, valign = 'top')
lineup Name team position pts_base pts_pred sd_pts Salary
1 Kyler Murray ARI QB 24.35641 23.100515 1.5493170 8000
Ezekiel Elliott DAL RB 18.24412 18.563206 0.2412823 8600
Miles Sanders PHI RB 15.64687 16.565535 2.4314640 6800
Austin Ekeler LAC RB 15.45477 16.343482 0.6063456 6900
DeAndre Hopkins ARI WR 17.50500 18.124973 1.3788180 8300
Adam Thielen MIN WR 14.16750 14.346877 2.2239000 7300
Preston Williams MIA WR 9.25000 11.307534 1.7346420 5400
Eric Ebron PIT TE 6.60250 9.181332 0.8895600 4900
Dallas Cowboys DAL DST 5.34000 8.577149 1.5715560 3700
2 Lamar Jackson BAL QB 25.20161 27.798769 1.5552474 9500
Ezekiel Elliott DAL RB 18.24412 18.318281 0.2412823 8600
Derrick Henry TEN RB 17.89731 17.966000 0.1111950 8300
Jerick McKinnon SFO RB 12.57500 15.100362 2.7798750 4900
Stefon Diggs BUF WR 13.07250 13.768757 0.6597570 6800
Darius Slayton NYG WR 11.03000 11.113423 0.0741300 5300
Breshad Perriman NYJ WR 7.74000 11.078554 3.1431120 5200
Travis Kelce KCC TE 13.28250 14.237297 1.2083190 7800
Indianapolis Colts IND DST 7.45000 7.270704 0.2965200 3600
3 Kyler Murray ARI QB 24.35641 22.953642 1.5493170 8000
Ezekiel Elliott DAL RB 18.24412 18.272529 0.2412823 8600
Derrick Henry TEN RB 17.89731 17.862217 0.1111950 8300
Jerick McKinnon SFO RB 12.57500 15.715141 2.7798750 4900
Tyreek Hill KCC WR 14.53750 15.613454 1.0452330 8000
Adam Thielen MIN WR 14.16750 15.604737 2.2239000 7300
Marquise Brown BAL WR 11.05750 15.276457 1.4677740 6200
Dalton Schultz DAL TE 6.50250 8.646957 1.4010570 4000
Pittsburgh Steelers PIT DST 8.87500 8.875000 0.0000000 4600

Next, we’ll look at which players were the most commonly chosen by our model.

ggplotly(sim_lu %>% 
           group_by(Name, position, Salary) %>% 
           dplyr::summarize(lu = n_distinct(lineup)) %>% 
           ungroup() %>% 
           group_by(position) %>% 
           top_n(10, lu) %>% 
           ungroup() %>% 
           arrange(position, desc(lu)) %>% 
           mutate(Name = factor(Name),
                  Name = fct_reorder(Name, lu)) %>% 
           ggplot(aes(x = Name, y = round(lu / 1000, 2), fill = Salary,
                      text = paste(Name, "in", lu, "lineups with", Salary, "salary"))) +
           geom_bar(stat = "identity") +
           facet_wrap(~position, ncol = 2, scales = "free_y") +
           coord_flip() +
           scale_fill_viridis_c() +
           xlab("") +
           ylab("Lineups (thousands)") +
           ggtitle("Top 10 Players By Position")) %>% 
  ggplotly(tooltip = "text")

We’ll explore the same question as above, but in a different form.

plyr_lu <- sim_lu %>%
  group_by(Name, position) %>%
  dplyr::summarize(lu = n_distinct(lineup)) %>%
  ungroup() 

ggplotly(projections %>% 
           filter(avg_type == "weighted") %>%
           mutate(Name = str_c(first_name, last_name, sep = " ")) %>% 
           inner_join(fan_duel, by = c("Name", "position")) %>%
           select(Name, team, position, points, Salary, sd_pts) %>%
           left_join(plyr_lu, by = 'Name') %>%
           replace_na(list(lu = 0)) %>%
           mutate(lu_bin = case_when(
             lu == 0 ~ "0 Lineups",
             TRUE ~ ">=1 Lineup"), 
             lu_5 = cut(lu, 5, labels = FALSE)) %>%              
           ggplot(aes(x=Salary, y=points, color=lu_bin, size=sd_pts, text=Name)) +
           geom_point() +
           theme_minimal() +
           scale_color_manual(values = c('red', 'blue'), name="") +
           geom_smooth(inherit.aes = FALSE, aes(x = Salary, y = points), method = 'lm') +
           ylab('Projected Points') +
           xlab('Salary') +
           ggtitle('Who makes it into Optimized Lineups?') +
           scale_x_continuous(labels=scales::dollar))

We’ll check our which position our model favors for the FLEX spot. Since this is only 0.5 PPR, there will be a blend of high upside RBs and solid, undervalued WRs. If this was full PPR, there may be some TEs, but I’d be surprised if there were too many tight ends here.

sim_lu %>%
  group_by(lineup) %>%
  mutate(lineup_pts=sum(pts_pred)) %>%
  group_by(lineup, position) %>%
  mutate(n = n()) %>%
  select(lineup, position, n, lineup_pts) %>%
  distinct() %>%
  spread(key=position, value=n) %>%
  filter(RB>=2, TE>=1, WR>=3) %>%
  mutate(flex=case_when(RB==3 ~ 'RB',
                        TE==2 ~ 'TE',
                        WR==4 ~ 'WR')) %>%
  group_by(flex) %>%
  dplyr::summarize(pts=median(lineup_pts),
                   cases=n()) %>%
  knitr::kable() %>%
  kable_styling(full_width = FALSE)
flex pts cases
RB 139.1187 980
WR 141.2564 20

Now we’ll look at our lineups and “bold” our Pareto lineups. These are the lineups where the points is maximized, while the uncertainty is minimzed. These are good for the cash games.

lu_df <- sim_lu %>%
  group_by(lineup) %>%
  dplyr::summarize(lineup_pts=sum(pts_pred),
                   lineup_sd=sum(sd_pts)) %>%
  ungroup()

pto <- psel(lu_df, low(lineup_sd) * high(lineup_pts))


ggplot(lu_df, aes(y=lineup_pts, x=lineup_sd)) +
  geom_point() +
  geom_point(data=pto, size=5) +
  ylab('Lineup Points') +
  xlab('Lineup Points St Dev') +
  ggtitle('Lineup Points vs Uncertainty',
          subtitle = 'Pareto Lineups Bolded')

Not let’s look at some of the “best” Pareto lineups. In other words, those that achieve the optimization as described above.

psel(lu_df, low(lineup_sd) * high(lineup_pts)) %>%
  left_join(sim_lu, by='lineup') %>%
  group_by(lineup) %>%
  arrange(lineup_pts, position, desc(Salary)) %>%
  select(lineup, lineup_pts, lineup_sd, Name, team, position, pts_pred, sd_pts, Salary) %>%
  mutate_at(vars(lineup_pts, lineup_sd, pts_pred, sd_pts), function(x) round(x, 2)) %>%
  knitr::kable() %>%
  kable_styling(fixed_thead = T) %>%
  column_spec(1:3, bold=TRUE) %>%
  collapse_rows(columns = 1:3, valign = 'top') %>%
  scroll_box(height = '700px', width = '100%')
lineup lineup_pts lineup_sd Name team position pts_pred sd_pts Salary
374 136.86 4.55 Kyler Murray ARI QB 27.36 1.55 8000
Ezekiel Elliott DAL RB 18.53 0.24 8600
Derrick Henry TEN RB 17.93 0.11 8300
Austin Ekeler LAC RB 15.36 0.61 6900
DeAndre Hopkins ARI WR 18.63 1.38 8300
Robby Anderson CAR WR 10.91 0.40 5900
Darius Slayton NYG WR 11.07 0.07 5300
Jonnu Smith TEN TE 8.96 0.19 4900
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
756 137.83 5.66 Kyler Murray ARI QB 26.13 1.55 8000
Ezekiel Elliott DAL RB 18.20 0.24 8600
Austin Ekeler LAC RB 17.58 0.61 6900
Kenyan Drake ARI RB 13.96 0.23 6600
DeAndre Hopkins ARI WR 18.16 1.38 8300
Calvin Ridley ATL WR 15.53 1.21 7100
Darius Slayton NYG WR 10.99 0.07 5300
Noah Fant DEN TE 9.19 0.38 5300
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
419 138.58 6.02 Kyler Murray ARI QB 25.79 1.55 8000
Ezekiel Elliott DAL RB 18.00 0.24 8600
Derrick Henry TEN RB 17.88 0.11 8300
Austin Ekeler LAC RB 16.00 0.61 6900
DeAndre Hopkins ARI WR 20.32 1.38 8300
Stefon Diggs BUF WR 14.53 0.66 6800
Darius Slayton NYG WR 10.88 0.07 5300
Dalton Schultz DAL TE 7.10 1.40 4000
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
982 139.16 6.55 Kyler Murray ARI QB 24.98 1.55 8000
Derrick Henry TEN RB 17.81 0.11 8300
Austin Ekeler LAC RB 16.27 0.61 6900
Kenyan Drake ARI RB 13.75 0.23 6600
DeAndre Hopkins ARI WR 18.50 1.38 8300
Adam Thielen MIN WR 18.79 2.22 7300
Darius Slayton NYG WR 11.02 0.07 5300
Dallas Goedert PHI TE 9.95 0.38 5500
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
312 142.12 7.69 Carson Wentz PHI QB 21.45 1.52 7300
Ezekiel Elliott DAL RB 18.95 0.24 8600
Derrick Henry TEN RB 17.84 0.11 8300
Jerick McKinnon SFO RB 19.62 2.78 4900
DeAndre Hopkins ARI WR 18.40 1.38 8300
Calvin Ridley ATL WR 15.91 1.21 7100
Darius Slayton NYG WR 11.15 0.07 5300
Dallas Goedert PHI TE 9.92 0.38 5500
Pittsburgh Steelers PIT DST 8.88 0.00 4600
841 142.79 7.74 Kyler Murray ARI QB 27.20 1.55 8000
Derrick Henry TEN RB 17.86 0.11 8300
Austin Ekeler LAC RB 15.79 0.61 6900
Miles Sanders PHI RB 15.67 2.43 6800
DeAndre Hopkins ARI WR 19.02 1.38 8300
Calvin Ridley ATL WR 17.93 1.21 7100
Darius Slayton NYG WR 11.18 0.07 5300
Dallas Goedert PHI TE 10.04 0.38 5500
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
776 143.86 8.66 Kyler Murray ARI QB 25.08 1.55 8000
Derrick Henry TEN RB 17.77 0.11 8300
Austin Ekeler LAC RB 16.68 0.61 6900
Jerick McKinnon SFO RB 18.73 2.78 4900
DeAndre Hopkins ARI WR 16.66 1.38 8300
Stefon Diggs BUF WR 14.27 0.66 6800
Darius Slayton NYG WR 10.92 0.07 5300
Travis Kelce KCC TE 15.87 1.21 7800
Indianapolis Colts IND DST 7.88 0.30 3600
58 146.95 9.17 Kyler Murray ARI QB 21.26 1.55 8000
Derrick Henry TEN RB 17.79 0.11 8300
Austin Ekeler LAC RB 15.81 0.61 6900
Miles Sanders PHI RB 22.29 2.43 6800
Adam Thielen MIN WR 22.23 2.22 7300
Calvin Ridley ATL WR 16.43 1.21 7100
Terry McLaurin WAS WR 12.89 0.36 6500
Dallas Goedert PHI TE 9.81 0.38 5500
Indianapolis Colts IND DST 8.45 0.30 3600
178 148.04 10.42 Kyler Murray ARI QB 28.33 1.55 8000
Ezekiel Elliott DAL RB 18.56 0.24 8600
Austin Ekeler LAC RB 15.21 0.61 6900
Jerick McKinnon SFO RB 19.54 2.78 4900
DeAndre Hopkins ARI WR 18.71 1.38 8300
Tyreek Hill KCC WR 16.15 1.05 8000
Marquise Brown BAL WR 13.00 1.47 6200
Mike Gesicki MIA TE 10.44 1.35 5200
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
273 152.58 11.28 Kyler Murray ARI QB 28.09 1.55 8000
Ezekiel Elliott DAL RB 18.43 0.24 8600
James Conner PIT RB 16.36 1.71 6900
Jerick McKinnon SFO RB 19.59 2.78 4900
DeAndre Hopkins ARI WR 20.40 1.38 8300
Adam Thielen MIN WR 17.12 2.22 7300
Calvin Ridley ATL WR 15.60 1.21 7100
Jonnu Smith TEN TE 8.89 0.19 4900
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800
985 152.75 15.02 Kyler Murray ARI QB 26.39 1.55 8000
Ezekiel Elliott DAL RB 18.93 0.24 8600
Miles Sanders PHI RB 19.04 2.43 6800
Jerick McKinnon SFO RB 13.43 2.78 4900
DeAndre Hopkins ARI WR 19.21 1.38 8300
Calvin Ridley ATL WR 15.34 1.21 7100
Braxton Berrios NYJ WR 17.91 4.23 4500
Travis Kelce KCC TE 14.40 1.21 7800
Tampa Bay Buccaneers TBB DST 8.10 0.00 3800